clean__bac__main<-function(years){
#2017 should be run first; this is because it has no missing names and locations 
  
#Read county mapping
setwd(wd_data_raw_other)
county_mapping<-readRDS("county_mapping.rds")

#import, clean and add town to HS graduation file
#Year of HS graduation
for (i in years){
  print(i)
  Sys.setlocale(locale="English")
  graduation_file<-paste("bacalaureat_",i,".csv",sep="")
  setwd(wd_data_raw)
  data_bac_raw<-read.csv(graduation_file,sep = ";",	encoding="UTF-8", stringsAsFactors=FALSE)
  #data_bac_raw <-data_bac_raw %>% mutate(id = 1000000*i+row_number())
  data_bac_raw <- mutate(data_bac_raw, id = rownames(data_bac_raw))
  
  char_name_bac<-sort(unique(unlist(unique(strsplit(data_bac_raw$nume, "")))))
  char_school_bac<-sort(unique(unlist(unique(strsplit(data_bac_raw$unitate_de_invatamant, "")))))
  
  Sys.setlocale(locale="Romanian")
  setwd(wd_code)
  eval(parse('clean__bac__clean_names.R', encoding = 'UTF-8'))
  eval(parse('clean__bac__clean_hs.R', encoding = 'UTF-8'))
  
  data_bac_raw$nume<-clean__bac__clean_names(data_bac_raw)
  print("bac name cleaned")
  data_bac_raw<-clean__bac__clean_hs(data_bac_raw,i)
  print("hs cleaned")
  data_bac_raw$judet<-trimws(data_bac_raw$judet)
  
  if (i<2018){
    data_bac_raw<-base::merge(data_bac_raw,county_mapping,by.x=c("judet"),by.y=c("short_name"),suffixes = c("",""))
    data_bac_raw$judet<-data_bac_raw$long_name
    data_bac_raw<-data_bac_raw[,!colnames(data_bac_raw) %in% c("long_name")]
    data_bac_raw$judet<-as.character(data_bac_raw$judet) 
  }
  data_bac_raw<-data_bac_raw %>% rename(judet_bac=judet)
  #Save data
  setwd(wd_data_raw)
  graduation_file_final<-paste("data_bac_raw",i,sep="")
  setwd(wd_data_intermediate)
  saveRDS(data_bac_raw, file = paste(graduation_file_final,".rds",sep=""))
  #data_bac_raw<-readRDS(paste(graduation_file_final,".rds",sep=""))
  

}
}